clear all
global system "linux"

if "${system}" == "linux" {
	global code "/"
	global s "/"
}

run "${code}${s}_set-path.do"
cd "${code}/DataImport"


*** Fannie data
use "${embsRaw}/fannie_loanlevel.dta", clear

* Check whether loans show up just once
sort loanseqnum_id cusip_id
by loanseqnum_id cusip_id: keep if _n == _N // this drop almost all duplicates (very small number)
by loanseqnum_id: keep if _n == _N // this drop a very small number

* Merge with security level data
merge m:1 cusip_id using "${cached}/embs-sec-cleaned.dta", nogen keep(master matched) keepusing(tba embs_product issue_amt)
foreach x of varlist tbaeligcode_id embs_product {
	replace `x' = subinstr(`x', " ", "", .)
}
drop if tbaeligcode_id == "#"

* time variable
gen orig_ym = mofd(firstpaymt_date) - 2  
format orig_ym %tm

save "${cached}/embs-fannie-cleaned.dta", replace


*** Freddie Data
use "${embsRaw}/freddie_loanlevel.dta", clear

* Merge with security level data
merge m:1 cusip_id using "${cached}/embs-sec-cleaned.dta", nogen keep(master matched) keepusing(tba embs_product issue_amt)
foreach x of varlist tbaeligcode_id embs_product {
	replace `x' = subinstr(`x', " ", "", .)
}
drop if tbaeligcode_id == "#"

* Drop Seller's state_id address adn zip_code
replace seller_id = substr(seller_id, 1, length(seller_id) - 8)
replace seller_id = subinstr(seller_id, " ", "", .)

* time variable
gen orig_ym = mofd(firstpaymt_date) - 2  
format orig_ym %tm

save "${cached}/embs-freddie-cleaned.dta", replace



